#################################################################################### 
####################################################################################
####################################################################################
# Analyze HH-Level Data
####################################################################################
####################################################################################
library(here)
library(kableExtra)
library(RItools)
####################################################################################
####################################################################################
## Read in survey data (from: analysis_balance.R)
aej = read.csv(here("data/AEJ2018_HHmain_id_attrition.csv"))

####################################################################################
####################################################################################
## Table D.5

## Village
# Omnibus test

# Slice up data into the normal sample (without attrition) and the control sample (with attrition)
aejc = aej[aej$treatment == 0,]
aej = aej[aej$attrition == 0,]

aej = aej[! duplicated(aej$villageid), c("village_hhs_baseline", "village_hhsu5_baseline", "village_distance_road", "village_distance_electricity", "village_distance_HF",
                                         "village_HF_within5km", "village_distance_HOSP", "treatment", "villageid", "branchid", "attrition")]

model = formula("treatment ~ village_hhs_baseline + village_hhsu5_baseline + village_distance_road + village_distance_electricity + village_distance_HF + village_HF_within5km + village_distance_HOSP")
balance_test = xBalance(model,
                        strata = factor(aej$branchid), #list(noblocks=NULL)
                        data = aej, na.rm = T,
                        report=c("adj.means","adj.mean.diffs","z.scores","chisquare.test","p.values"))
rownames(balance_test$results) = c("Households per cluster","Households w/under-5","Distance main road", "Distance transmission line",
                                   "Distance health center","Health centers w/in 5km","Distance to hospital")
rownames(balance_test$overall) = c("Overall Test Statistic")

kable(round(balance_test$results,2),align = "c", booktabs = T, caption = "Village-Level Block-Adjusted Omnibus Balance Test", 
      col.names = c("Control Mean", "Treatment Mean", "Difference","Z-score","P-value"), format = "latex")
kable(round(balance_test$overall,2), align = "c", row.names = T,
      col.names = c("Chi-Sq", "Df", "P-value"), format = "latex")

# Joint Orthogonality Test
for (i in names(aej[c("treatment")])){ # Dependent Variables
  model = paste(i,"~","village_hhs_baseline + village_hhsu5_baseline + village_distance_road + village_distance_electricity + village_distance_HF + village_HF_within5km + village_distance_HOSP")
  # Run each model
  assign(x = paste("m",i, sep = "."), 
         value = lm(as.formula(model), data = aej))
  # Output clustered SEs (county)
  assign(x = paste("c",i, sep = "."), 
         value = coeftest(lm(as.formula(model), data = aej),
                          cluster.vcov(lm(as.formula(model), data = aej), aej$villageid)))
}

lapply(grep("^c\\.", ls(), value = T), get)
rm(list = grep("^(c|m)\\.", ls(), value = T))
rm(aej,balance_test, i, model)

####################################################################################
####################################################################################
## Table D.6

# Slice up data into the normal sample (without attrition) and the control sample (with attrition)
aej = aejc

aej = aej[! duplicated(aej$villageid), c("village_hhs_baseline", "village_hhsu5_baseline", "village_distance_road", "village_distance_electricity", "village_distance_HF",
                                         "village_HF_within5km", "village_distance_HOSP", "treatment", "villageid", "branchid", "attrition")]

model = formula("attrition ~ village_hhs_baseline + village_hhsu5_baseline + village_distance_road + village_distance_electricity + village_distance_HF + village_HF_within5km + village_distance_HOSP")
balance_test = xBalance(model,
                        strata = factor(aej$branchid), #list(noblocks=NULL)
                        data = aej, na.rm = T,
                        report=c("adj.means","adj.mean.diffs","z.scores","chisquare.test","p.values"))
rownames(balance_test$results) = c("Households per cluster","Households w/under-5","Distance main road", "Distance transmission line",
                                   "Distance health center","Health centers w/in 5km","Distance to hospital")
rownames(balance_test$overall) = c("Overall Test Statistic")

kable(round(balance_test$results,2),align = "c", booktabs = T, caption = "Village-Level Block-Adjusted Omnibus Balance Test", 
      col.names = c("Control Mean", "Treatment Mean", "Difference","Z-score","P-value"), format = "latex")
kable(round(balance_test$overall,2), align = "c", row.names = T,
      col.names = c("Chi-Sq", "Df", "P-value"), format = "latex")

# Joint Orthogonality Test
for (i in names(aej[c("attrition")])){ # Dependent Variables
  model = paste(i,"~","village_hhs_baseline + village_hhsu5_baseline + village_distance_road + village_distance_electricity + village_distance_HF + village_HF_within5km + village_distance_HOSP")
  # Run each model
  assign(x = paste("m",i, sep = "."), 
         value = lm(as.formula(model), data = aej))
  # Output clustered SEs (county)
  assign(x = paste("c",i, sep = "."), 
         value = coeftest(lm(as.formula(model), data = aej),
                          cluster.vcov(lm(as.formula(model), data = aej), aej$villageid)))
}

lapply(grep("^c\\.", ls(), value = T), get)
rm(list = grep("^(c|m)\\.", ls(), value = T))
rm(aejc,balance_test, i, model)

####################################################################################